#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import PyPDF2
import json
import sys

def extract_text_from_pdf(pdf_path):
    """从PDF文件中提取文本"""
    try:
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text = ""
            
            # 遍历所有页面
            for page_num in range(len(pdf_reader.pages)):
                page = pdf_reader.pages[page_num]
                text += page.extract_text() + "\n"
            
            return text
    except Exception as e:
        print(f"提取PDF文本时出错: {e}")
        return None

def main():
    pdf_path = "时培豪.pdf"
    
    print("正在提取PDF文本...")
    text = extract_text_from_pdf(pdf_path)
    
    if text:
        # 保存提取的文本到文件
        with open("extracted_text.txt", "w", encoding="utf-8") as f:
            f.write(text)
        
        print("PDF文本提取完成！")
        print("文本已保存到 extracted_text.txt")
        print("\n--- 提取的文本预览 ---")
        print(text[:1000] + "..." if len(text) > 1000 else text)
    else:
        print("PDF文本提取失败！")

if __name__ == "__main__":
    main()